//
//  NSString+HTML.h
//  MWFeedParser
//
//  Copyright (c) 2010 Michael Waterfall
//  
//  Permission is hereby granted, free of charge, to any person obtaining a copy
//  of this software and associated documentation files (the "Software"), to deal
//  in the Software without restriction, including without limitation the rights
//  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
//  copies of the Software, and to permit persons to whom the Software is
//  furnished to do so, subject to the following conditions:
//  
//  1. The above copyright notice and this permission notice shall be included
//     in all copies or substantial portions of the Software.
//  
//  2. This Software cannot be used to archive or collect data such as (but not
//     limited to) that of events, news, experiences and activities, for the 
//     purpose of any concept relating to diary/journal keeping.
//  
//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
//  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
//  THE SOFTWARE.
//
import UIKit
import Foundation
// Dependant upon GTMNSString+HTML
extension NSString {
    // Strips HTML tags & comments, removes extra whitespace and decodes HTML character entities.
    func stringByConvertingHTMLToPlainText() -> String {
                        // Character sets
            var stopCharacters = NSCharacterSet(charactersInString: String(format: "< \t\n\r%C%C%C%C", (0x0085 as! unichar), (0x000C as! unichar), (0x2028 as! unichar), (0x2029 as! unichar)))
            var newLineAndWhitespaceCharacters = NSCharacterSet(charactersInString: String(format: " \t\n\r%C%C%C%C", (0x0085 as! unichar), (0x000C as! unichar), (0x2028 as! unichar), (0x2029 as! unichar)))
            var tagNameCharacters = NSCharacterSet(charactersInString: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
                // Scan and find all tags
            var result = String() /* capacity: self.length */
            var scanner = NSScanner(string: self)
            scanner.charactersToBeSkipped = nil
            scanner.caseSensitive = true
            var str: String? = nil
            var tagName: String? = nil
            var dontReplaceTagWithSpace = false
            repeat {
                // Scan up to the start of a tag or whitespace
                if scanner.scanUpToCharactersFromSet<NSObject>(stopCharacters, intoString: str) {
                    result += str
                    str = nil
                    // reset
                }
                // Check if we've stopped at a tag/comment or whitespace
                if scanner.scanString("<", intoString: nil) {
                    // Stopped at a comment, script tag, or other tag
                    if scanner.scanString("!--", intoString: nil) {
                        // Comment
                        scanner.scanUpToString("-->", intoString: nil)
                        scanner.scanString("-->", intoString: nil)
                    }
                    else if scanner.scanString("script", intoString: nil) {
                        // Script tag where things don't need escaping!
                        scanner.scanUpToString("</script>", intoString: nil)
                        scanner.scanString("</script>", intoString: nil)
                    }
                    else {
                        // Tag - remove and replace with space unless it's
                        // a closing inline tag then dont replace with a space
                        if scanner.scanString("/", intoString: nil) {
                            // Closing tag - replace with space unless it's inline
                            tagName = nil
                            dontReplaceTagWithSpace = false
                            if scanner.scanCharactersFromSet<NSObject>(tagNameCharacters, intoString: tagName) {
                                tagName = tagName!.lowercaseString
                                dontReplaceTagWithSpace = (tagName! == "a") || (tagName! == "b") || (tagName! == "i") || (tagName! == "q") || (tagName! == "span") || (tagName! == "em") || (tagName! == "strong") || (tagName! == "cite") || (tagName! == "abbr") || (tagName! == "acronym") || (tagName! == "label")
                            }
                            // Replace tag with string unless it was an inline
                            if !dontReplaceTagWithSpace && result.characters.count > 0 && !scanner.isAtEnd() {
                                result += " "
                            }
                        }
                        // Scan past tag
                        scanner.scanUpToString(">", intoString: nil)
                        scanner.scanString(">", intoString: nil)
                    }
                }
                else {
                    // Stopped at whitespace - replace all whitespace and newlines with a space
                    if scanner.scanCharactersFromSet<NSObject>(newLineAndWhitespaceCharacters, intoString: nil) {
                        if result.characters.count > 0 && !scanner.isAtEnd() {
                            result += " "
                        }
                        // Dont append space to beginning or end of result
                    }
                }
            } while !scanner.isAtEnd()
                // Cleanup
                // Decode HTML entities and return
            var retString = result.stringByDecodingHTMLEntities()
            // Return
            return retString

    }
    // Decode all HTML entities using GTM.

    func stringByDecodingHTMLEntities() -> String {
        // Can return self so create new string if we're a mutable string
        return String = self.gtm_stringByUnescapingFromHTML()
    }
    // Encode all HTML entities using GTM.

    func stringByEncodingHTMLEntities() -> String {
        // Can return self so create new string if we're a mutable string
        return String = self.gtm_stringByEscapingForAsciiHTML()
    }
    // Minimal unicode encoding will only cover characters from table
    // A.2.2 of http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters
    // which is what you want for a unicode encoded webpage.

    func stringByEncodingHTMLEntities(isUnicode: Bool) -> String {
        // Can return self so create new string if we're a mutable string
        return String = (isUnicode ? self.gtm_stringByEscapingForHTML() : self.gtm_stringByEscapingForAsciiHTML())
    }
    // Replace newlines with <br /> tags.

    func stringWithNewLinesAsBRs() -> String {
                        // Strange New lines:
                //	Next Line, U+0085
                //	Form Feed, U+000C
                //	Line Separator, U+2028
                //	Paragraph Separator, U+2029
                // Scanner
            var scanner = NSScanner(string: self)
            scanner.charactersToBeSkipped = nil
            var result = String()
            var temp: String
            var newLineCharacters = NSCharacterSet(charactersInString: String(format: "\n\r%C%C%C%C", (0x0085 as! unichar), (0x000C as! unichar), (0x2028 as! unichar), (0x2029 as! unichar)))
            // Scan
            repeat {
                // Get non new line characters
                temp = nil
                scanner.scanUpToCharactersFromSet<NSObject>(newLineCharacters, intoString: temp)
                if temp != "" {
                    result += temp
                }
                temp = nil
                // Add <br /> s
                if scanner.scanString("\r\n", intoString: nil) {
                    // Combine \r\n into just 1 <br />
                    result += "<br />"
                }
                else if scanner.scanCharactersFromSet<NSObject>(newLineCharacters, intoString: temp) {
                    // Scan other new line characters and add <br /> s
                    if temp != "" {
                        for i in 0..<temp.characters.count {
                            result += "<br />"
                        }
                    }
                }

            } while !scanner.isAtEnd()
                // Cleanup & return
            var retString = String = result
            // Return
            return retString

    }
    // Remove newlines and white space from string.

    func stringByRemovingNewLinesAndWhitespace() -> String {
                        // Strange New lines:
                //	Next Line, U+0085
                //	Form Feed, U+000C
                //	Line Separator, U+2028
                //	Paragraph Separator, U+2029
                // Scanner
            var scanner = NSScanner(string: self)
            scanner.charactersToBeSkipped = nil
            var result = String()
            var temp: String
            var newLineAndWhitespaceCharacters = NSCharacterSet(charactersInString: String(format: " \t\n\r%C%C%C%C", (0x0085 as! unichar), (0x000C as! unichar), (0x2028 as! unichar), (0x2029 as! unichar)))
            // Scan
            while !scanner.isAtEnd() {
                // Get non new line or whitespace characters
                temp = nil
                scanner.scanUpToCharactersFromSet<NSObject>(newLineAndWhitespaceCharacters, intoString: temp)
                if temp != "" {
                    result += temp
                }
                // Replace with a space
                if scanner.scanCharactersFromSet<NSObject>(newLineAndWhitespaceCharacters, intoString: nil) {
                    if result.characters.count > 0 && !scanner.isAtEnd() {
                        // Dont append space to beginning or end of result
                        result += " "
                    }
                }
            }
                // Cleanup
                // Return
            var retString = String = result
            // Return
            return retString

    }
    // Wrap plain URLs in <a href="..." class="linkified">...</a>
    //  - Ignores URLs inside tags (any URL beginning with =")
    //  - HTTP & HTTPS schemes only
    //  - Only works in iOS 4+ as we use NSRegularExpression (returns self if not supported so be careful with NSMutableStrings)
    //  - Expression: (?<!=")\b((http|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&amp;:/~\+#]*[\w\-\@?^=%&amp;/~\+#])?)
    //  - Adapted from http://regexlib.com/REDetails.aspx?regexp_id=96

    func stringByLinkifyingURLs() -> String {
        if !NSClassFromString("NSRegularExpression") {

        }
        do {
                        var pattern = "(?<!=\")\\b((http|https):\\/\\/[\\w\\-_]+(\\.[\\w\\-_]+)+([\\w\\-\\.,@?^=%%&amp;:/~\\+#]*[\\w\\-\\@?^=%%&amp;/~\\+#])?)"
                    var regex = try NSRegularExpression(pattern: pattern, options: [])
                    var modifiedString = regex.stringByReplacingMatchesInString(self, options: [], range: NSMakeRange(0, self.length), withTemplate: "<a href=\"$1\" class=\"linkified\">$1</a>")
                    return modifiedString
        
        }
        catch let error {
        }
    }
    // DEPRECIATED - Please use NSString stringByConvertingHTMLToPlainText

    func stringByStrippingTags() -> String {
                        // Find first & and short-cut if we can
            var ampIndex = self.rangeOfString("<", options: .LiteralSearch).location
            if ampIndex == NSNotFound {
                return String = self
                // return copy of string as no tags found
            }
                // Scan and find all tags
            var scanner = NSScanner(string: self)
            scanner.charactersToBeSkipped = nil
            var tags = Set<NSObject>()
            var tag: String
            repeat {
                // Scan up to <
                tag = nil
                scanner.scanUpToString("<", intoString: nil)
                scanner.scanUpToString(">", intoString: tag)
                // Add to set
                if tag != "" {
                    var t = "\(tag)>" 
                    tags.append(t)
                }
            } while !scanner.isAtEnd()
                // Strings
            var result = String = self
            var finalString: String
                // Replace tags
            var replacement: String
            for t: String in tags {
                // Replace tag with space unless it's an inline element
                replacement = " "
                if (t == "<a>") || (t == "</a>") || (t == "<span>") || (t == "</span>") || (t == "<strong>") || (t == "</strong>") || (t == "<em>") || (t == "</em>") {
                    replacement = ""
                }
                // Replace
                result = result.stringByReplacingOccurrencesOfString(t, withString: replacement, options: .LiteralSearch, range: NSMakeRange(0, result.characters.count))
            }
            // Remove multi-spaces and line breaks
            finalString = result.stringByRemovingNewLinesAndWhitespace()
            // Cleanup
            // Return
            return finalString

    }

// MARK: - Instance Methods

}
//
//  NSString+HTML.m
//  MWFeedParser
//
//  Copyright (c) 2010 Michael Waterfall
//
//  Permission is hereby granted, free of charge, to any person obtaining a copy
//  of this software and associated documentation files (the "Software"), to deal
//  in the Software without restriction, including without limitation the rights
//  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
//  copies of the Software, and to permit persons to whom the Software is
//  furnished to do so, subject to the following conditions:
//
//  1. The above copyright notice and this permission notice shall be included
//     in all copies or substantial portions of the Software.
//
//  2. This Software cannot be used to archive or collect data such as (but not
//     limited to) that of events, news, experiences and activities, for the
//     purpose of any concept relating to diary/journal keeping.
//
//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
//  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
//  THE SOFTWARE.
//